import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "plotly_white"
data = pd.read_csv("Instagram-Reach.csv", encoding = "latin-1")
print(data.head())
Date Instagram reach 0 2022-04-01T00:00:00 7620 1 2022-04-02T00:00:00 12859 2 2022-04-03T00:00:00 16008 3 2022-04-04T00:00:00 24349 4 2022-04-05T00:00:00 20532
data["Date"] = pd.to_datetime(data["Date"])
print(data.head())
Date Instagram reach 0 2022-04-01 7620 1 2022-04-02 12859 2 2022-04-03 16008 3 2022-04-04 24349 4 2022-04-05 20532
#Analyze the trend of Instagram reach over time using a line chart
fig = go.Figure()
fig.add_trace(go.Scatter(x=data["Date"],
y=data["Instagram reach"],
mode="lines", name="Instagram reach"))
fig.update_layout(title="Instagram Reach Trend", xaxis_title="Date",
yaxis_title="Instagram Reach")
fig.show()
#Analyze Instagram reach for each day using a bar chart:
fig = go.Figure()
fig.add_trace(go.Bar(x=data["Date"],
y=data["Instagram reach"],
name="Instagram reach"))
fig.update_layout(title="Instagram Reach by Day",
xaxis_title="Date",
yaxis_title="Instagram Reach")
fig.show()
#Analyze the distribution of Instagram reach using a box plot
fig = go.Figure()
fig.add_trace(go.Box(y=data["Instagram reach"],
name="Instagram reach"))
fig.update_layout(title="Instagram Reach Box Plot",
yaxis_title="Instagram Reach")
fig.show()
data["Day"] = data["Date"].dt.day_name()
print(data.head())
Date Instagram reach Day 0 2022-04-01 7620 Friday 1 2022-04-02 12859 Saturday 2 2022-04-03 16008 Sunday 3 2022-04-04 24349 Monday 4 2022-04-05 20532 Tuesday
import numpy as np
day_stats = data.groupby("Day")["Instagram reach"].agg(["mean", "median", "std"]).reset_index()
print(day_stats)
Day mean median std 0 Friday 46666.849057 35574.0 29856.943036 1 Monday 52621.692308 46853.0 32296.071347 2 Saturday 47374.750000 40012.0 27667.043634 3 Sunday 53114.173077 47797.0 30906.162384 4 Thursday 48570.923077 39150.0 28623.220625 5 Tuesday 54030.557692 48786.0 32503.726482 6 Wednesday 51017.269231 42320.5 29047.869685
#Create a bar chart to visualize the reach for each day of the week
fig= go.Figure()
fig.add_trace(go.Bar(x=day_stats["Day"],
y=day_stats["mean"],
name="Mean"))
fig.add_trace(go.Bar(x=day_stats["Day"],
y=day_stats["median"],
name="Median"))
fig.add_trace(go.Bar(x=day_stats['Day'],
y=day_stats['std'],
name='Standard Deviation'))
fig.update_layout(title='Instagram Reach by Day of the Week',
xaxis_title='Day',
yaxis_title='Instagram Reach')
fig.show()
#Look at the Trends and Seasonal patterns of Instagram reach
from plotly.tools import mpl_to_plotly
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
data = data[["Date", "Instagram reach"]]
result = seasonal_decompose(data["Instagram reach"],
model="multiplicative",
period=100)
fig = plt.figure()
fig = result.plot()
fig = mpl_to_plotly(fig)
fig.show()
<Figure size 640x480 with 0 Axes>
pd.plotting.autocorrelation_plot(data["Instagram reach"])
<AxesSubplot:xlabel='Lag', ylabel='Autocorrelation'>
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(data["Instagram reach"], lags = 100)
p, d, q = 8, 1, 2
import statsmodels.api as sm
import warnings
model=sm.tsa.statespace.SARIMAX(data["Instagram reach"],
order=(p, d, q),
seasonal_order=(p, d, q, 12))
model=model.fit()
print(model.summary())
C:\Users\Beytullah\anaconda3\lib\site-packages\statsmodels\base\model.py:604: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
SARIMAX Results
==========================================================================================
Dep. Variable: Instagram reach No. Observations: 365
Model: SARIMAX(8, 1, 2)x(8, 1, 2, 12) Log Likelihood -3938.519
Date: Mon, 31 Jul 2023 AIC 7919.039
Time: 14:30:07 BIC 8000.175
Sample: 0 HQIC 7951.327
- 365
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.1902 7.117 0.027 0.979 -13.758 14.138
ar.L2 0.4715 6.609 0.071 0.943 -12.482 13.425
ar.L3 -0.1178 1.517 -0.078 0.938 -3.091 2.856
ar.L4 0.0414 0.284 0.146 0.884 -0.515 0.598
ar.L5 -0.0211 0.187 -0.113 0.910 -0.388 0.346
ar.L6 0.0309 0.277 0.111 0.911 -0.513 0.575
ar.L7 0.0086 0.442 0.019 0.984 -0.857 0.874
ar.L8 -0.0135 0.248 -0.055 0.957 -0.500 0.473
ma.L1 -0.2245 7.113 -0.032 0.975 -14.165 13.716
ma.L2 -0.7087 6.831 -0.104 0.917 -14.096 12.679
ar.S.L12 -1.1027 1.516 -0.728 0.467 -4.073 1.868
ar.S.L24 -1.7695 2.255 -0.785 0.433 -6.188 2.649
ar.S.L36 -1.4525 1.937 -0.750 0.453 -5.249 2.344
ar.S.L48 -1.1027 1.581 -0.697 0.486 -4.202 1.997
ar.S.L60 -0.7956 1.130 -0.704 0.481 -3.010 1.419
ar.S.L72 -0.4568 0.798 -0.573 0.567 -2.020 1.106
ar.S.L84 -0.2267 0.506 -0.448 0.654 -1.219 0.765
ar.S.L96 -0.0550 0.247 -0.223 0.824 -0.539 0.429
ma.S.L12 0.2403 1.516 0.158 0.874 -2.732 3.213
ma.S.L24 0.8331 1.307 0.638 0.524 -1.728 3.394
sigma2 4.863e+08 1.69e-07 2.88e+15 0.000 4.86e+08 4.86e+08
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 214.11
Prob(Q): 0.93 Prob(JB): 0.00
Heteroskedasticity (H): 0.72 Skew: 0.29
Prob(H) (two-sided): 0.07 Kurtosis: 6.78
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 6.62e+31. Standard errors may be unstable.
#Make predictions using the model and have a look at the forecasted reach
predictions = model.predict(len(data), len(data)+100)
trace_train = go.Scatter(x=data.index,
y=data["Instagram reach"],
mode="lines",
name="Training Data")
trace_pred = go.Scatter(x=predictions.index,
y=predictions,
mode="lines",
name="Predictions")
layout = go.Layout(title="Instagram Reach Time Series and Predictions",
xaxis_title="Date",
yaxis_title="Instagram Reach")
fig = go.Figure(data=[trace_train, trace_pred], layout=layout)
fig.show()